This is an implementation of a feed forward neural network using numpy. This model uses a dataset containing the pixel values of clothing images and categorize them into 10 different classes.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
train_data = pd.read_csv('trainData.csv', header=None)
train_lb = pd.read_csv('trainLabels.csv', header=None)
test_data = pd.read_csv('testData.csv', header=None)
test_lb = pd.read_csv('testLabels.csv', header=None)
train_data
indexes = []
for i in range(10):
indexes.append(train_lb.index[train_lb[0] == i].tolist()[0])
for i in range(10):
x = np.array(train_data.iloc[i])
x = x.reshape(28,28)
plt.figure(figsize=(2, 2))
plt.imshow(x, cmap='gray')
plt.xlabel(str(i+1)+' th class')
plt.show()
train_values = []
test_values = []
for i in range(10) :
train_values.append(train_lb.loc[ train_lb[0] == i ].shape[0])
test_values.append(test_lb.loc[test_lb[0] == i].shape[0])
plt.figure(figsize=(8, 8))
plt.bar([i for i in range(10)], train_values, label='train', width=0.3)
plt.bar([i+0.3 for i in range(10)], test_values, label='test', width=0.3)
plt.legend()
plt.show()
Normalization makes all the data to be between the range of 0 to 1 this way we can benefit from working with smaller numbers which requires less process and also prevent gradian explosion
t = train_data / 255
t.to_csv('NtrainData.csv', headers = None, )
class Dataloader:
def __init__(self, data, labels, n_classes, batch_size=None, shuffle=False):
assert len(data)==len(labels)
self.__n_classes = n_classes
self.__batch_size = batch_size
self.__shuffle = shuffle
self.__data = data
self.__onehot_labels = self.__onehot(labels, self.__n_classes)
def __onehot(self, labels, n_classes):
labels_changed = labels.reshape(-1)
n_values = np.max(labels_changed) + 1
onehot_vectors = np.eye(n_values)[labels_changed]
return onehot_vectors
def __shuffle_dataset(self):
order = np.random.permutation(len(self.__data))
self.__data = self.__data[order]
self.__onehot_labels = self.__onehot_labels[order]
def __iter__(self):
if self.__shuffle:
self.__shuffle_dataset()
if self.__batch_size==None:
yield (np.matrix(self.__data), np.matrix(self.__onehot_labels))
return
for idx in range(0, len(self.__data), self.__batch_size):
yield (np.matrix(self.__data[idx:idx+self.__batch_size]),
np.matrix(self.__onehot_labels[idx:idx+self.__batch_size]))
class Identical:
def __init__(self): pass
def __val(self, matrix):
identical_value = np.matrix(matrix, dtype=float)
return identical_value
def derivative(self, matrix):
temp = np.matrix(matrix, dtype=float)
identical_derivative = np.matrix(np.full(np.shape(temp), 1.))
return identical_derivative
def __call__(self, matrix):
return self.__val(matrix)
class Relu:
def __init__(self): pass
def __f(self, x):
return max(0, x)
def __df(self, x):
return 0 if x < 0 else 1
def __relu(self, matrix):
func = np.vectorize(self.__f)
relu_value = func(matrix)
return relu_value
def derivative(self, matrix):
func = np.vectorize(self.__df)
relu_derivative = func(matrix)
return relu_derivative
def __call__(self, matrix):
return self.__relu(matrix)
class LeakyRelu:
def __init__(self, negative_slope=0.01):
self.negative_slope = 0.01
def __f(self, x):
return x if x >= 0 else x*self.negative_slope
def __df(self, x):
return 1 if x >= 0 else self.negative_slope
def __val(self, matrix):
func = np.vectorize(self.__f)
leacky_relu_value = func(matrix)
return leacky_relu_value
def derivative(self, matrix):
func = np.vectorize(self.__df)
leacky_relu_derivative = func(matrix)
return leacky_relu_derivative
def __call__(self, matrix):
return self.__val(matrix)
class Sigmoid:
def __init__(self): pass
def __f(self, x):
return 1/(1+math.exp(-x))
def __df(self, x):
return self.__f(x)*(1-self.__f(x))
def __val(self, matrix):
func = np.vectorize(self.__f)
sigmoid_value = func(matrix)
return sigmoid_value
def derivative(self, matrix):
func = np.vectorize(self.__df)
sigmoid_derivative = func(matrix)
return sigmoid_derivative
def __call__(self, matrix):
return self.__val(matrix)
class Softmax:
def __init__(self): pass
def __val(self, matrix):
mx = matrix.max(axis=1)
numerator = np.exp(matrix - mx)
denominator = np.sum(numerator, axis=-1)
softmax_value = numerator/denominator
return softmax_value
def derivative(self, matrix):
# TODO: Implement
return softmax_derivative
def __call__(self, matrix):
return self.__val(matrix)
class CrossEntropy: #(with softmax)
def __init__(self): pass
def __val(self, true_val, expected_val):
assert np.shape(true_val)==np.shape(expected_val)
softmax_output = Softmax()(true_val)
cross_entropy_value = -1*np.multiply(np.log(softmax_output), expected_val).sum(1)
return cross_entropy_value
def derivative(self, true_val, expected_val):
assert np.shape(true_val)==np.shape(expected_val)
cross_entropy_derivative = Softmax()(true_val) - expected_val
return cross_entropy_derivative
def __call__(self, true_val, expected_val):
return self.__val(true_val, expected_val)
class Layer:
DEFAULT_LOW, DEFAULT_HIGH, DEFAULT_MEAN, DEFAULT_VAR = 0, 0.05, 0., 1.
def __init__(self, input_size, output_size,
activation=Identical(), initial_weight='uniform', **initializing_parameters):
assert type(initial_weight)==str, 'Undefined activation function!'
self.__weight_initializer_dict = {'uniform':self.__uniform_weight, 'normal':self.__normal_weight}
assert initial_weight in self.__weight_initializer_dict, 'Undefined weight initialization function!'
self.__n_neurons = output_size
weight_initializer = self.__weight_initializer_dict[initial_weight]
self.__weight = weight_initializer(input_size, self.__n_neurons, **initializing_parameters)
self.__bias = weight_initializer(1, self.__n_neurons, **initializing_parameters)
self.__activation = activation
self.__last_input = None
self.__last_activation_input = None
self.__last_activation_output = None
self.__last_activation_derivative = None
def forward(self, layer_input):
assert np.ndim(layer_input)==2
assert np.size(self.__weight,0) == np.size(layer_input,1)
self.__last_input = layer_input
self.__last_activation_input = np.matmul(layer_input, self.__weight)
self.__last_activation_input = self.__last_activation_input + self.__bias
self.__last_activation_output = self.__activation(self.__last_activation_input)
self.__last_activation_derivative = self.__activation.derivative(self.__last_activation_input)
return self.__last_activation_output
def update_weights(self, backprop_tensor, lr):
assert np.ndim(backprop_tensor)==2
assert np.size(backprop_tensor,0) == np.size(self.__last_activation_derivative,0)
assert np.size(backprop_tensor,1) == self.__n_neurons
dEdY = np.multiply(backprop_tensor, self.__last_activation_derivative)
dEdW = np.matmul(np.transpose(self.__last_input), dEdY)
backprop_tensor = np.matmul(dEdY, np.transpose(self.__weight))
self.__weight = self.__weight - lr*dEdW
dEdB = dEdY.sum(axis = 1)
self.__bias = self.__bias - lr*dEdB
return backprop_tensor
def __uniform_weight(self, dim1, dim2, **initializing_parameters):
low, high = self.DEFAULT_LOW, self.DEFAULT_HIGH
if 'low' in initializing_parameters.keys(): low = initializing_parameters['low']
if 'high' in initializing_parameters.keys(): high = initializing_parameters['high']
weights = np.random.uniform(low=low, high=high, size=(dim1, dim2))
return weights
def __normal_weight(self, dim1, dim2, **initializing_parameters):
mean, var = self.DEFAULT_MEAN, self.DEFAULT_VAR
if 'mean' in initializing_parameters.keys(): mean = initializing_parameters['mean']
if 'var' in initializing_parameters.keys(): var = initializing_parameters['var']
weights = np.random.normal(mean, np.sqrt(var), (dim1, dim2))
return weights
def get_output(self):
return self.__last_activation_output
@property
def n_neurons(self): return self.__n_neurons
@property
def weight(self): return self.__weight
@property
def bias(self): return self.__bias
@property
def activation(self): return self.__activation
class FeedForwardNN:
def __init__(self, input_shape):
self.__input_shape = input_shape
self.__output_shape = None
self.__layers_list = []
self.__lr = None
self.__loss = None
self.train_output = []
self.test_output = []
def add_layer(self, n_neurons, activation=Relu(), initial_weight='uniform', **initializing_parameters):
assert type(n_neurons)==int, "Invalid number of neurons for the layer!"
assert n_neurons>0, "Invalid number of neurons for the layer!"
n_prev_neurons = self.__input_shape if len(self.__layers_list)==0 else self.__layers_list[-1].n_neurons
new_layer = Layer(n_prev_neurons, n_neurons, activation, initial_weight, **initializing_parameters)
self.__layers_list.append(new_layer)
self.__output_shape = self.__layers_list[-1].n_neurons
def set_training_param(self, loss=CrossEntropy(), lr=1e-3):
assert self.__layers_list, "Uncomplete model!"
self.__loss = loss
self.__lr = lr
def forward(self, network_input):
assert type(self.__output_shape) != None, "Model is not compiled!"
inp = network_input
for layer in self.__layers_list:
inp = layer.forward(inp)
network_output = inp
return network_output
def fit(self, epochs, trainloader, testloader=None, print_results=True):
assert type(self.__output_shape) != None, "Model is not compiled!"
assert type(self.__lr) != None and type(self.__loss) != None, "Training paramenters are not set!"
log = {"train_accuracy":[], "train_loss":[], "test_accuracy":[], "test_loss":[]}
for epoch in range(1, epochs+1):
if print_results:
print('Epoch {}:'.format(epoch))
self.train_output = []
self.test_output = []
average_accuracy, average_loss = self.__train(trainloader)
log['train_accuracy'].append(average_accuracy)
log['train_loss'].append(average_loss)
if print_results:
print('\tTrain: Average Accuracy: {}\tAverage Loss: {}'.format(average_accuracy, average_loss))
if type(testloader) != type(None):
average_accuracy, average_loss = self.__test(testloader)
log['test_accuracy'].append(average_accuracy)
log['test_loss'].append(average_loss)
if print_results:
print('\tTest: Average Accuracy: {}\tAverage Loss: {}'.format(average_accuracy, average_loss))
return log
def __train(self, trainloader):
bach_accuracies, batch_losses = [], []
size = -1
for x_train, y_train in trainloader:
if size == -1:
size = x_train.shape[0]
if x_train.shape[0] != size:
break
batch_accuracy, batch_loss = self.__train_on_batch(x_train, y_train)
bach_accuracies.append(batch_accuracy)
batch_losses.append(batch_loss)
return np.mean(bach_accuracies), np.mean(batch_losses)
def __test(self, testloader):
bach_accuracies, batch_losses = [], []
size = -1
for x_test, y_test in testloader:
if size == -1:
size = x_test.shape[0]
if x_test.shape[0] != size:
break
batch_accuracy, batch_loss = self.__test_on_batch(x_test, y_test)
bach_accuracies.append(batch_accuracy)
batch_losses.append(batch_loss)
return np.mean(bach_accuracies), np.mean(batch_losses)
def __train_on_batch(self, x_batch, y_batch):
output = self.forward(x_batch)
self.__update_weights(output, y_batch)
batch_accuracy = self.__compute_accuracy(output, y_batch)
batch_average_loss = np.mean(self.__loss(output, y_batch))
self.train_output.append((self.__layers_list[-2].get_output(), y_batch))
return (batch_accuracy, batch_average_loss)
def __test_on_batch(self, x_batch, y_batch):
output = self.forward(x_batch)
batch_accuracy = self.__compute_accuracy(output, y_batch)
batch_average_loss = np.mean(self.__loss(output, y_batch))
self.test_output.append((self.__layers_list[-2].get_output(), y_batch))
return (batch_accuracy, batch_average_loss)
def get_outputs(self):
return self.train_output, self.test_output
def __get_labels(self, outputs):
# TODO: Implement
return labels
def __compute_accuracy(self, output, expected_output):
true_labels = np.argmax(output, axis=1)
expected_labels = np.argmax(expected_output, axis=1)
assert len(true_labels) == len(expected_labels)
accuracy = (true_labels == expected_labels).sum() / len(true_labels)
return accuracy
def __update_weights(self, output, y_train):
backprob_tensor = self.__loss.derivative(output, y_train)
for layer in reversed(self.__layers_list):
backprob_tensor = layer.update_weights(backprob_tensor, self.__lr)
return
Loading and normalizing data
train_data = np.loadtxt(open("trainData.csv", "rb"), delimiter=",")
train_labels = np.loadtxt(open("trainLabels.csv", "rb"), delimiter=",")
test_data = np.loadtxt(open("testData.csv", "rb"), delimiter=",")
test_labels = np.loadtxt(open("testLabels.csv", "rb"), delimiter=",")
ntrain_data = train_data / 255
ntest_data = test_data / 255
train_labels = train_labels.astype('int64')
test_labels = test_labels.astype('int64')
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-3
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=LeakyRelu(), weight_initializer='uniform')
network.add_layer(10, activation=LeakyRelu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-2
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=LeakyRelu(), weight_initializer='uniform')
network.add_layer(10, activation=LeakyRelu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-2
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=LeakyRelu(), weight_initializer='normal')
network.add_layer(10, activation=LeakyRelu(), weight_initializer='normal')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-2
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation= Relu(), weight_initializer='uniform')
network.add_layer(10, activation=Relu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-1
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation= Relu(), weight_initializer='uniform')
network.add_layer(10, activation=Relu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-3
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation= Relu(), weight_initializer='uniform')
network.add_layer(10, activation=Identical(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
1e-3 and 1e-2 have proven to a proprate learning rate for this model but using 1e-1 is too much for learning rate and model didn't perfome well
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-3
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation= Sigmoid(), weight_initializer='uniform')
network.add_layer(10, activation=Identical(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
class Tanh:
def __init__(self): pass
def __f(self, x):
return 1/(1+math.exp(-x))
def __Tf(self, x):
return 2/(1+math.exp(-2*x))
def __df(self, x):
return 4*self.__f(2*x)*(1-self.__f(2*x))
def __val(self, matrix):
func = np.vectorize(self.__Tf)
sigmoid_value = func(matrix)
return sigmoid_value
def derivative(self, matrix):
func = np.vectorize(self.__df)
sigmoid_derivative = func(matrix)
return sigmoid_derivative
def __call__(self, matrix):
return self.__val(matrix)
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-2
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation= Tanh(), weight_initializer='uniform')
network.add_layer(10, activation=Tanh(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
Sigmoid and Tanh has very small derivaing for too large of too small values and this derivative which will be used for updating the weights become very small and almost makes the weights stop changing so using this activation functions especially in deep networks wouldn't be a good choice
Relu turns the output and its derivative to zero for negetive values this makes the noroun to have no contibution to output and also no contibution to the updating prcess so using LeakyRelu that doesn't compeletly ignores the negetive values would be a better choice.
Choosing large or small batch size comes with pros. and cons. Large batches allow faster computaion and more reliable loss value since its mean of more instances of data but it lessen the number of time our weights are being updated. Also small batches makes the updating process more frequent but also less considrate for the whole data which may be good in case we want to exit local maxes but it's not wise to choose very small batch size.
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-2
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 16, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 16, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=LeakyRelu(), weight_initializer='uniform')
network.add_layer(10, activation=LeakyRelu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-2
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 128, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 128, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=LeakyRelu(), weight_initializer='uniform')
network.add_layer(10, activation=LeakyRelu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
Overfitting is the process the training the model too much which makes it too specific for the train data and lose its generality when facing a new data(test data) in overfitting the result for the train data keeps growing but the result for the test data degrade at some point and this can be prevented using early stopping
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-3
EPOCHS = 200
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=LeakyRelu(), weight_initializer='uniform')
network.add_layer(10, activation=LeakyRelu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
plt.figure(figsize=(10, 10))
plt.plot(log['train_accuracy'], label='train')
plt.plot(log['test_accuracy'], label='test')
plt.ylim(0.8)
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
plt.figure(figsize=(10, 10))
plt.plot(log['train_loss'], label='train')
plt.plot(log['test_loss'], label='test')
plt.ylim(0, 1)
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
Here we use a 2 neuron so we can plot it and see how each layer before this layer specifies a feature.
INPUT_SHAPE = len(ntrain_data[0])
LEARNING_RATE = 1e-2
EPOCHS = 30
TRAINLOADER = Dataloader(ntrain_data, train_labels, 10, 32, True)
TESTLOADER = Dataloader(ntest_data, test_labels, 10, 32, True)
network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=LeakyRelu(), weight_initializer='normal')
# network.add_layer(20, activation=Relu(), weight_initializer='normal')
network.add_layer(2, activation=LeakyRelu(), weight_initializer='normal')
network.add_layer(10, activation=LeakyRelu(), weight_initializer='normal')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)
log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER, print_results=True)
train_results, test_results = network.get_outputs()
import seaborn as snb
colors = ['red', 'blue', 'purple', 'green', 'yellow', 'orange', 'pink', 'black', 'gray', 'cyan']
plt.figure(figsize=(10, 10))
for batch in train_results:
x = np.array((batch[0][:, 0]))
y = np.array((batch[0][:, 1]))
labels = np.array( batch[1].argmax(1) ).reshape(-1)
plt.scatter(x, y, color=[colors[labels[i]] for i in range(len(labels))])
plt.title('Train results of the 2nouron layer')
plt.show()
plt.figure(figsize=(10, 10))
for batch in test_results:
x = np.array((batch[0][:, 0]))
y = np.array((batch[0][:, 1]))
labels = np.array( batch[1].argmax(1) ).reshape(-1)
plt.scatter(x, y, color=[colors[labels[i]] for i in range(len(labels))])
plt.title('Test results of the 2nouron layer')
plt.show()
As we can see data with similar labels have close values their output in the 2noroun layer.